Making choropleths is fun. I will leave at that! Want more explanation? Follow this link.
In this post I will show three way to create choropleths with Python. The Good, the Bad and the Ugly (or in the original: Il buono, il brutto, il cattivo)!
The Ugly: geopandas
Choropleths with geopandas is exactaly like plotting with pandas: very convenient, but cumbersome to customize. There not an easy way to make the plot look good. However, if your goal is quick visualization geopandas is your friend.
import string
import unicodedata
import geopandas
from pandas import read_csv
def remove_accents(s):
"""Python 3 please!"""
return ''.join(x for x in unicodedata.normalize('NFKD', s)
if x in string.ascii_letters).lower()
geo_path = './data/br_states.json'
gdf = geopandas.read_file(geo_path)
gdf['name'] = gdf['name'].str.encode('utf-8')
gdf['name'] = [remove_accents(s.decode('utf-8')) for s in gdf['name']]
df = read_csv('./data/pfas.csv')
df['name'] = [remove_accents(s.decode('utf-8')) for s in df['name']]
gdf.sort('name', inplace=True)
df.sort('name', inplace=True)
gdf['2013'] = df['2013'].values
kw = dict(column='2013', k=6, colormap='YlGn')
ax = gdf.plot(scheme='QUANTILES', **kw)
That is it! We spent more lines loading and cleaning the data. Note that we can change the classification schemes. I for one prefer the Fisher Jenks to enhance the color differences, but is some cases you have to be conservative and use an equal interval. Note that you will need PySAL to use the classification schemes.
ax = gdf.plot(scheme='fisher_jenks', **kw)
ax = gdf.plot(scheme='equal_interval', **kw)
The Good: If you want to publish that figure in a paper you will need to work a little bit more on it with matplotlib and cartopy.
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
from pysal.esda.mapclassify import Fisher_Jenks
def norm_cmap(values, cmap, vmin=None, vmax=None):
"""
Normalize and set colormap
Parameters
----------
values : Series or array to be normalized
cmap : matplotlib Colormap
normalize : matplotlib.colors.Normalize
cm : matplotlib.cm
vmin : Minimum value of colormap. If None, uses min(values).
vmax : Maximum value of colormap. If None, uses max(values).
Returns
-------
n_cmap : mapping of normalized values to colormap (cmap)
"""
mn = vmin or min(values)
mx = vmax or max(values)
norm = Normalize(vmin=mn, vmax=mx)
n_cmap = plt.cm.ScalarMappable(norm=norm, cmap=cmap)
return n_cmap
fj = Fisher_Jenks(df['2013'].values, k=6)
cmap = norm_cmap(fj.yb, cmap='YlGn')
df['color'] = [cmap.to_rgba(value) for value in df['2013'].values]
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
from cartopy.io import shapereader
states = df['name'].str.decode('utf-8').tolist()
kw = dict(resolution='50m', category='cultural',
name='admin_1_states_provinces')
states_shp = shapereader.natural_earth(**kw)
shp = shapereader.Reader(states_shp)
subplot_kw = dict(projection=ccrs.PlateCarree())
fig, ax = plt.subplots(figsize=(5, 5),
subplot_kw=subplot_kw)
ax.set_extent([-82, -32, -45, 10])
ax.background_patch.set_visible(False)
ax.outline_patch.set_visible(False)
for record, state in zip(shp.records(), shp.geometries()):
name = record.attributes['name'].decode('latin-1')
name = remove_accents(name)
if name in states:
facecolor = df[df['name'] == name]['color']
ax.add_geometries([state], ccrs.PlateCarree(),
facecolor=facecolor, edgecolor='black')
cmap = plt.cm.ScalarMappable(cmap='YlGn')
cmap.set_array([])
cmap.set_clim(-0.5, 6+0.5)
bounds = [0, 1, 2, 3, 4, 5, 6]
norm = plt.cm.colors.BoundaryNorm(bounds, cmap.cmap.N)
kw = dict(orientation='vertical', extend='both', cmap=cmap,
norm=norm, boundaries=[0]+bounds+[13],ticks=bounds,
spacing='proportional')
cax = fig.add_axes([0.4, 0.35, 0.015, 0.2])
cbar = plt.colorbar(cmap, cax=cax, **kw)
fig.savefig('choropleth.png', dpi=150, transparent=True)
plt.close(fig)
import numpy as np
from PIL import Image, ImageChops
def trim(img):
border = Image.new(img.mode, img.size, img.getpixel((0, 0)))
diff = ImageChops.difference(img, border)
diff = ImageChops.add(diff, diff, 2.0, -100)
bbox = diff.getbbox()
if bbox:
img = img.crop(bbox)
return img
img = Image.open('choropleth.png')
img = trim(img)
After some last trimming we have a much nicer figure to put on a paper!
The customizations are natural to matplotlib users. I had a weird issue when
trying to plot with geopandas over a matplotlib ax.
img
The bad·ass: Want it to be pretty? Easy? Web friendly? And interactive? Not a problem! Folium has your back.
import json
import folium
import numpy as np
geo_str = json.dumps(json.load(open(geo_path, 'r')))
threshold_scale = np.linspace(df['2013'].min(),
df['2013'].max(), 6, dtype=int).tolist()
mapa = folium.Map(location=[-15.80, -47.88],
tiles="Mapbox Bright",
zoom_start=3)
mapa.geo_json(geo_str=geo_str,
data=df,
columns=['state', '2013'],
fill_color='YlGn',
key_on='feature.id',
threshold_scale=threshold_scale)
mapa